#!/bin/bash
#
############################
#                          #
# DuckDuckGo Search Script #
#                          #
############################
#
# A simple script that takes a textual argument, searches DuckDuckGo, and outputs
# a series of links in a text document as DDGresults.txt, and to the command line
#
# Inititally created as a component of Category's "Chaffer", inspired by the book
# "Little Brother" by Cory Doctorow - currently a WIP
#


# Store the provided argument in variable SEARCH
SEARCH=$1


# Replace any spaces with "+" characters
SEARCH=${SEARCH// /+}


# Use html version of DuckDuckGo to get results of the search in html format
wget -O temp.html -q https://duckduckgo.com/html/?q=$SEARCH


# Restrict to lines with a href link (generally most <a> tags)
cat temp.html | grep href >> temp.html


# Replace broken characters (":", "/", "-") with sed
sed -i -e 's/%3A/:/g' temp.html
sed -i -e 's#%2F#/#g' temp.html
sed -i -e 's/%2D/-/g' temp.html
# Using '#' as a delimiter for %2F, as replacing some with forward slashes.
# Looks ugly, but works.


# Remove DDG /l/.. shite
sed -i -e 's#/l/?kh=-1&amp;uddg=##g' temp.html


# Output just the links between href and double-quote, and strip tracking crap
cat temp.html | grep -Po '(?<=href=")[^"]*' | grep http > tmplinks.txt

# Strip duplicates from tmplinks.txt, then record any non-DuckDuckGo links
sort -u tmplinks.txt | grep -vi duckduckgo | grep -v duck.co > DDGresults.txt

# Remove temporary files
rm temp.html
rm tmplinks.txt


# Output list of links
cat DDGresults.txt
